﻿\begin{thebibliography}{99}


@inproceedings{iseli1999estscan,
 title={ESTScan: a program for detecting, evaluating, and reconstructing potential coding regions in EST sequences},
 author={Iseli, C. and Jongeneel, C.V. and Bucher, P. and others},
 booktitle={Proc Int Conf Intell Syst Mol Biol},
 volume={7},
 pages={138--148},
 year={1999}
}


@inproceedings{karo2001applying,
 title={Applying Grid technologies to bioinformatics},
 author={Karo, M. and Dwan, C. and Freeman, J. and Weissman, J. and Livny, M. and Retzel, E.},
 booktitle={High Performance Distributed Computing, 2001. Proceedings. 10th IEEE International Symposium on},
 pages={441--442},
 year={2001},
 organization={IEEE}
}


@article{taylor2010overview,
 title={An overview of the Hadoop/MapReduce/HBase framework and its current applications in bioinformatics},
 author={Taylor, R.C.},
 journal={BMC bioinformatics},
 volume={11},
 number={Suppl 12},
 pages={S1},
 year={2010},
 publisher={BioMed Central Ltd}
}


@article{kryazhimskiy2008population,
 title={The population genetics of dN/dS},
 author={Kryazhimskiy, S. and Plotkin, J.B.},
 journal={PLoS genetics},
 volume={4},
 number={12},
 pages={e1000304},
 year={2008},
 publisher={Public Library of Science}
}


@article{aruliah2012best,
 title={Best Practices for Scientific Computing},
 author={Aruliah, DA and Brown, C.T. and Hong, N.P.C. and Davis, M. and Guy, R.T. and Haddock, S.H.D. and Huff, K. and Mitchell, I. and Plumbley, M. and Waugh, B. and others},
 journal={arXiv preprint arXiv:1210.0530},
 year={2012}
}


@article{carmichael2011biocompute,
 title={Biocompute 2.0: an improved collaborative workspace for data intensive bio-science},
 author={Carmichael, R. and Braga-Henebry, P. and Thain, D. and Emrich, S.},
 journal={Concurrency and Computation: Practice and Experience},
 volume={23},
 number={17},
 pages={2305--2314},
 year={2011},
 publisher={Wiley Online Library}
}

@inproceedings{albrecht2012makeflow,
    title={Makeflow: A Portable Abstraction for Data Intensive Computing on Clusters, Clouds, and Grids},
      author={Albrecht, M. and Donnelly, P. and Bui, P. and Thain, D.},
        booktitle={Workshop on Scalable Workflow Enactment Engines and Technologies (SWEET) at ACM SIGMOD},
          year={2012}
}

@article{feldman1979make,
 title={Make—A program for maintaining computer programs},
 author={Feldman, S.I.},
 journal={Software: Practice and experience},
 volume={9},
 number={4},
 pages={255--265},
 year={1979},
 publisher={Wiley Online Library}
}


@inproceedings{zhao2012designing,
 title={Designing and Deploying a Scientific Computing Cloud Platform},
 author={Zhao, Y. and Zhang, Y. and Tian, W. and Xue, R. and Lin, C.},
 booktitle={Grid Computing (GRID), 2012 ACM/IEEE 13th International Conference on},
 pages={104--113},
 year={2012},
 organization={IEEE}
}


@inproceedings{gonzalez2007web,
 title={Web service orchestration for bioinformatics systems: challenges and current workflow definition approaches},
 author={Gonzalez, G. and Balasooriya, J.},
 booktitle={Web Services, 2007. ICWS 2007. IEEE International Conference on},
 pages={1226--1227},
 year={2007},
 organization={IEEE}
}


@article{orvis2010ergatis,
 title={Ergatis: a web interface and scalable software system for bioinformatics workflows},
 author={Orvis, J. and Crabtree, J. and Galens, K. and Gussman, A. and Inman, J.M. and Lee, E. and Nampally, S. and Riley, D. and Sundaram, J.P. and Felix, V. and others},
 journal={Bioinformatics},
 volume={26},
 number={12},
 pages={1488--1492},
 year={2010},
 publisher={Oxford Univ Press}
}


#THINGS THAT MUST BE bibtex-ed
@Mayer, Christoph, Phobos 3.3.11, 2006-2010,
<http://www.rub.de/spezzoo/cm/cm_phobos.htm>.


@Abajian C. SPUTNIK, 1994, http://www.abajian.com/sputnik.


@article{yu2011managing,
 title={Managing Worker Pools for Cloud Workflows},
 author={Yu, L. and Carmichael, R. and Emrich, S. and Thain, D.},
 year={2011}
}


@article{giardine2005galaxy,
 title={Galaxy: a platform for interactive large-scale genome analysis},
 author={Giardine, B. and Riemer, C. and Hardison, R.C. and Burhans, R. and Elnitski, L. and Shah, P. and Zhang, Y. and Blankenberg, D. and Albert, I. and Taylor, J. and others},
 journal={Genome research},
 volume={15},
 number={10},
 pages={1451--1455},
 year={2005},
 publisher={Cold Spring Harbor Lab}
}


@phdthesis{fielding2000architectural,
 title={Architectural styles and the design of network-based software architectures},
 author={Fielding, R.T.},
 year={2000},
 school={University of California}
}


@article{krasner1988description,
 title={A description of the model-view-controller user interface paradigm in the smalltalk-80 system},
 author={Krasner, G.E. and Pope, S.T.},
 journal={Journal of object oriented programming},
 volume={1},
 number={3},
 pages={26--49},
 year={1988}
}


@inproceedings{prabhu2011survey,
 title={A survey of the practice of computational science},
 author={Prabhu, P. and Kim, H. and Oh, T. and Jablin, T.B. and Johnson, N.P. and Zoufaly, M. and Raman, A. and Liu, F. and Walker, D. and Zhang, Y. and others},
 booktitle={State of the Practice Reports},
 pages={19},
 year={2011},
 organization={ACM}
}


@article{thain2009chirp,
 title={Chirp: a practical global filesystem for cluster and Grid computing},
 author={Thain, D. and Moretti, C. and Hemmes, J.},
 journal={Journal of Grid Computing},
 volume={7},
 number={1},
 pages={51--72},
 year={2009},
 publisher={Springer}
}


@article{yu2010harnessing,
 title={Harnessing parallelism in multicore clusters with the All-Pairs, Wavefront, and Makeflow abstractions},
 author={Yu, L. and Moretti, C. and Thrasher, A. and Emrich, S. and Judd, K. and Thain, D.},
 journal={Cluster Computing},
 volume={13},
 number={3},
 pages={243--256},
 year={2010},
 publisher={Springer}
}


@article{conesa2005blast2go,
 title={Blast2GO: a universal tool for annotation, visualization and analysis in functional genomics research},
 author={Conesa, A. and G{\"o}tz, S. and Garc{\'\i}a-G{\'o}mez, J.M. and Terol, J. and Tal{\'o}n, M. and Robles, M.},
 journal={Bioinformatics},
 volume={21},
 number={18},
 pages={3674--3676},
 year={2005},
 publisher={Oxford Univ Press}
}


@article{zdobnov2001interproscan,
 title={InterProScan--an integration platform for the signature-recognition methods in InterPro},
 author={Zdobnov, E.M. and Apweiler, R.},
 journal={Bioinformatics},
 volume={17},
 number={9},
 pages={847--848},
 year={2001},
 publisher={Oxford Univ Press}
}


@article{martin2011next,
 title={Next-generation transcriptome assembly},
 author={Martin, J.A. and Wang, Z.},
 journal={Nature Reviews Genetics},
 year={2011},
 publisher={Nature Publishing Group}
}


@article{johnson2008ncbi,
 title={NCBI BLAST: a better web interface},
 author={Johnson, M. and Zaretskaya, I. and Raytselis, Y. and Merezhuk, Y. and McGinnis, S. and Madden, T.L.},
 journal={Nucleic acids research},
 volume={36},
 number={suppl 2},
 pages={W5--W9},
 year={2008},
 publisher={Oxford Univ Press}
}


@article{lawson2007vectorbase,
 title={VectorBase: a home for invertebrate vectors of human pathogens},
 author={Lawson, D. and Arensburger, P. and Atkinson, P. and Besansky, N.J. and Bruggner, R.V. and Butler, R. and Campbell, K.S. and Christophides, G.K. and Christley, S. and Dialynas, E. and others},
 journal={Nucleic Acids Research},
 volume={35},
 number={suppl 1},
 pages={D503--D505},
 year={2007},
 publisher={Oxford Univ Press}
}

@article{jeffreys1985individual,
title={Individual-specific ‘fingerprints’ of human DNA},
author={Jeffreys, AJ and Wilson, V. and Thein, SL},
journal={Nature},
volume={316},
number={6023},
pages={76--79},
year={1985}
}

@article{koressaar2007enhancements,
title={Enhancements and modifications of primer design program Primer3},
author={Koressaar, T. and Remm, M.},
journal={Bioinformatics},
volume={23},
number={10},
pages={1289--1291},
year={2007},
publisher={Oxford Univ Press}
}

@article{jeffreys1985hypervariable,
title={Hypervariable‘minisatellite’ regions in human DNA},
author={Jeffreys, A.J. and Wilson, V. and Thein, S.L. and others},
journal={Nature},
volume={314},
number={6006},
pages={67--73},
year={1985},
publisher={London}
}

@article{tautz1984simple,
title={Simple sequences are ubiquitous repetitive components of eukaryotic genomes},
author={Tautz, D. and Renz, M.},
journal={Nucleic Acids Research},
volume={12},
number={10},
pages={4127--4138},
year={1984},
publisher={Oxford Univ Press}
}

@article{holm2010snpexp,
title={SNPexp-A web tool for calculating and visualizing correlation between HapMap genotypes and gene expression levels},
 author={Holm, K. and Melum, E. and Franke, A. and Karlsen, T.H.},
 journal={BMC bioinformatics},
 volume={11},
 number={1},
 pages={600},
 year={2010},
 publisher={BioMed Central Ltd}
}


@article{nekrutenko2002ka,
 title={The KA/KS ratio test for assessing the protein-coding potential of genomic regions: an empirical and simulation study},
 author={Nekrutenko, A. and Makova, K.D. and Li, W.H.},
 journal={Genome research},
 volume={12},
 number={1},
 pages={198--202},
 year={2002},
 publisher={Cold Spring Harbor Lab}
}


@article{brockman2008quality,
 title={Quality scores and SNP detection in sequencing-by-synthesis systems},
 author={Brockman, W. and Alvarez, P. and Young, S. and Garber, M. and Giannoukos, G. and Lee, W.L. and Russ, C. and Lander, E.S. and Nusbaum, C. and Jaffe, D.B.},
 journal={Genome Research},
 volume={18},
 number={5},
 pages={763--770},
 year={2008},
 publisher={Cold Spring Harbor Lab}
}


@article{hahn2009gene,
 title={Gene discovery using massively parallel pyrosequencing to develop ESTs for the flesh fly Sarcophaga crassipalpis},
 author={Hahn, D.A. and Ragland, G.J. and Shoemaker, D.D.W. and Denlinger, D.L.},
 journal={BMC genomics},
 volume={10},
 number={1},
 pages={234},
 year={2009},
 publisher={BioMed Central Ltd}
}


@inproceedings{litzkow1988condor,
 title={Condor-a hunter of idle workstations},
 author={Litzkow, M.J. and Livny, M. and Mutka, M.W.},
 booktitle={Distributed Computing Systems, 1988., 8th International Conference on},
 pages={104--111},
 year={1988},
 organization={IEEE}
}


@article{smith1981textordfeminineidentification,
 title={{\textordfeminine}Identification of Common Molecular Subsequences, {\textordmasculine} J},
 author={Smith, TF and Waterman, MS},
 journal={Molecular Biology},
 volume={147},
 pages={195--197},
 year={1981}
}


@article{fukunishi2001amino,
 title={Amino acid translation program for full-length cDNA sequences with frameshift errors},
 author={FUKUNISHI, Y. and HAYASHIZAKI, Y.},
 journal={Physiological Genomics},
 volume={5},
 number={2},
 pages={81--87},
 year={2001},
 publisher={Am Physiological Soc}
}


@article{kawai2001functional,
 title={Functional annotation of a full-length mouse cDNA collection},
 author={Kawai, J. and Shinagawa, A. and Shibata, K. and Yoshino, M. and Itoh, M. and Ishii, Y. and Arakawa, T. and Hara, A. and Fukunishi, Y. and Konno, H. and others},
 journal={Nature},
 volume={409},
 number={6821},
 pages={685--690},
 year={2001},
 publisher={Nature Publishing Group}
}


#Previous work on identifying conservation over orthologs
@inproceedings{blanchette2000exact,
 title={An exact algorithm to identify motifs in orthologous sequences from multiple species},
 author={Blanchette, M. and Schwikowski, B. and Tompa, M.},
 booktitle={Proc Int Conf Intell Syst Mol Biol},
 volume={8},
 number={1553-0833},
 pages={37--45},
 year={2000}
}


@article{abascal2010translatorx,
 title={TranslatorX: multiple alignment of nucleotide sequences guided by amino acid translations},
 author={Abascal, F. and Zardoya, R. and Telford, M.J.},
 journal={Nucleic acids research},
 volume={38},
 number={suppl 2},
 pages={W7--W13},
 year={2010},
 publisher={Oxford Univ Press}
}


@article{ning2001ssaha,
 title={SSAHA: a fast search method for large DNA databases},
 author={Ning, Z. and Cox, A.J. and Mullikin, J.C.},
 journal={Genome research},
 volume={11},
 number={10},
 pages={1725--1729},
 year={2001},
 publisher={Cold Spring Harbor Lab}
}


@article{darling2003design,
 title={The design, implementation, and evaluation of mpiBLAST},
 author={Darling, A. and Carey, L. and Feng, W.},
 journal={Proceedings of ClusterWorld},
 volume={2003},
 year={2003}
}


@article{dumontier2002nblast,
 title={NBLAST: a cluster variant of BLAST for NxN comparisons},
 author={Dumontier, M. and Hogue, C.},
 journal={BMC bioinformatics},
 volume={3},
 number={1},
 pages={13},
 year={2002},
 publisher={BioMed Central Ltd}
}


@article{oinn2004taverna,
 title={Taverna: a tool for the composition and enactment of bioinformatics workflows},
 author={Oinn, T. and Addis, M. and Ferris, J. and Marvin, D. and Senger, M. and Greenwood, M. and Carver, T. and Glover, K. and Pocock, M.R. and Wipat, A. and others},
 journal={Bioinformatics},
 volume={20},
 number={17},
 pages={3045--3054},
 year={2004},
 publisher={Oxford Univ Press}
}


@article{rumble2009shrimp,
 title={SHRiMP: accurate mapping of short color-space reads},
 author={Rumble, S.M. and Lacroute, P. and Dalca, A.V. and Fiume, M. and Sidow, A. and Brudno, M.},
 journal={PLoS computational biology},
 volume={5},
 number={5},
 pages={e1000386},
 year={2009},
 publisher={Public Library of Science}
}


@article{moreau2008special,
 title={Special issue: The first provenance challenge},
 author={Moreau, L. and Lud{\"a}scher, B. and Altintas, I. and Barga, R.S. and Bowers, S. and Callahan, S. and Chin Jr, G. and Clifford, B. and Cohen, S. and Cohen-Boulakia, S. and others},
 journal={Concurrency and Computation: Practice and Experience},
 volume={20},
 number={5},
 pages={409--418},
 year={2008},
 publisher={Wiley Online Library}
}


@inproceedings{carmichael2010biocompute,
 title={Biocompute: towards a collaborative workspace for data intensive bio-science},
 author={Carmichael, R. and Braga-Henebry, P. and Thain, D. and Emrich, S.},
 booktitle={Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing},
 pages={489--498},
 year={2010},
 organization={ACM}
}


@article{delcher1999improved,
 title={Improved microbial gene identification with GLIMMER},
 author={Delcher, A.L. and Harmon, D. and Kasif, S. and White, O. and Salzberg, S.L.},
 journal={Nucleic acids research},
 volume={27},
 number={23},
 pages={4636--4641},
 year={1999},
 publisher={Oxford Univ Press}
}


@article{wasmuth2004prot4est,
 title={prot4EST: translating expressed sequence tags from neglected genomes},
 author={Wasmuth, J.D. and Blaxter, M.L.},
 journal={Bmc Bioinformatics},
 volume={5},
 number={1},
 pages={187},
 year={2004},
 publisher={BioMed Central Ltd}
}


@article{spencer2012silent,
 title={Silent substitutions predictably alter translation elongation rates and protein folding efficiencies},
 author={Spencer, P.S. and Siller, E. and Anderson, J.F. and Barral, J.M.},
 journal={Journal of Molecular Biology},
 year={2012},
 publisher={Elsevier}
}


@inproceedings{bui2010weaver,
 title={Weaver: Integrating distributed computing abstractions into scientific workflows using python},
 author={Bui, P. and Yu, L. and Thain, D.},
 booktitle={Proceedings of the 19th ACM International Symposium on High Performance Distributed Computing},
 pages={636--643},
 year={2010},
 organization={ACM}
}


@article{grantham1980codon,
 title={Codon catalog usage and the genome hypothesis},
 author={Grantham, R. and Gautier, C. and Gouy, M. and Mercier, R. and Pave, A.},
 journal={Nucleic acids research},
 volume={8},
 number={1},
 pages={197--197},
 year={1980},
 publisher={Oxford Univ Press}
}


@article{duret2002evolution,
 title={Evolution of synonymous codon usage in metazoans},
 author={Duret, L.},
 journal={Current opinion in genetics \& development},
 volume={12},
 number={6},
 pages={640--649},
 year={2002},
 publisher={Elsevier}
}


@article{smith2001translationally,
 title={Why are translationally sub-optimal synonymous codons used in Escherichia coli?},
 author={Smith, N.G.C. and Eyre-Walker, A.},
 journal={Journal of Molecular Evolution},
 volume={53},
 number={3},
 pages={225--236},
 year={2001},
 publisher={Springer}
}


@article{clarke2008rare,
 title={Rare codons cluster},
 author={Clarke, T.F. and Clark, P.L.},
 journal={PLoS One},
 volume={3},
 number={10},
 pages={e3412},
 year={2008},
 publisher={Public Library of Science}
}


@article{li2003orthomcl,
 title={OrthoMCL: identification of ortholog groups for eukaryotic genomes},
 author={Li, L. and Stoeckert, C.J. and Roos, D.S.},
 journal={Genome research},
 volume={13},
 number={9},
 pages={2178--2189},
 year={2003},
 publisher={Cold Spring Harbor Lab}
}


@article{rice2000emboss,
 title={EMBOSS: the European molecular biology open software suite},
 author={Rice, P. and Longden, I. and Bleasby, A. and others},
 journal={Trends in genetics},
 volume={16},
 number={6},
 pages={276--277},
 year={2000},
 publisher={[Amsterdam, The Netherlands: Elsevier Science Publishers (Biomedical Division)], c1985-}
}


@article{edgar2004muscle,
 title={MUSCLE: multiple sequence alignment with high accuracy and high throughput},
 author={Edgar, R.C.},
 journal={Nucleic acids research},
 volume={32},
 number={5},
 pages={1792--1797},
 year={2004},
 publisher={Oxford Univ Press}
}


@article{miller2008aggressive,
 title={Aggressive assembly of pyrosequencing reads with mates},
 author={Miller, J.R. and Delcher, A.L. and Koren, S. and Venter, E. and Walenz, B.P. and Brownley, A. and Johnson, J. and Li, K. and Mobarry, C. and Sutton, G.},
 journal={Bioinformatics},
 volume={24},
 number={24},
 pages={2818--2824},
 year={2008},
 publisher={Oxford Univ Press}
}


@inproceedings{thrasher2010taming,
 title={Taming complex bioinformatics workflows with Weaver, Makeflow, and Starch},
 author={Thrasher, A. and Carmichael, R. and Bui, P. and Yu, L. and Thain, D. and Emrich, S.},
 booktitle={Workflows in Support of Large-Scale Science (WORKS), 2010 5th Workshop on},
 pages={1--6},
 year={2010},
 organization={IEEE}
}


@inproceedings{yu2012resource,
 title={Resource Management for Elastic Cloud Workflows},
 author={Yu, L. and Thain, D.},
 booktitle={Proceedings of the 2012 12th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (ccgrid 2012)},
 pages={775--780},
 year={2012},
 organization={IEEE Computer Society}
}


@article{chartier2012large,
 title={Large-scale analysis of conserved rare codon clusters suggests an involvement in co-translational molecular recognition events},
 author={Chartier, M. and Gaudreault, F. and Najmanovich, R.},
 journal={Bioinformatics},
 volume={28},
 number={11},
 pages={1438--1445},
 year={2012},
 publisher={Oxford Univ Press}
}


@article{zhang2012codon,
 title={Codon Deviation Coefficient: a novel measure for estimating codon usage bias and its statistical significance},
 author={Zhang, Z. and Li, J. and Cui, P. and Ding, F. and Li, A. and Townsend, J.P. and Yu, J.},
 journal={BMC bioinformatics},
 volume={13},
 number={1},
 pages={43},
 year={2012},
 publisher={BioMed Central Ltd}
}


#Transcriptome Analysis Citations
@article{lazzari2008version,
 title={Version VI of the ESTree db: an improved tool for peach transcriptome analysis},
 author={Lazzari, B. and Caprera, A. and Vecchietti, A. and Merelli, I. and Barale, F. and Milanesi, L. and Stella, A. and Pozzi, C.},
 journal={BMC bioinformatics},
 volume={9},
 number={Suppl 2},
 pages={S9},
 year={2008},
 publisher={BioMed Central Ltd}
}


@article{bateman2004pfam,
 title={The Pfam protein families database},
 author={Bateman, A. and Coin, L. and Durbin, R. and Finn, R.D. and Hollich, V. and Griffiths-Jones, S. and Khanna, A. and Marshall, M. and Moxon, S. and Sonnhammer, E.L.L. and others},
 journal={Nucleic acids research},
 volume={32},
 number={suppl 1},
 pages={D138--D141},
 year={2004},
 publisher={Oxford Univ Press}
}


@article{eddy2011accelerated,
 title={Accelerated profile HMM searches},
 author={Eddy, S.R.},
 journal={PLoS Computational Biology},
 volume={7},
 number={10},
 pages={e1002195},
 year={2011},
 publisher={Public Library of Science}
}


\end{thebibliography}\end{document}
